********************************************************************************
**	PURPOSE: Selection Effects by take-up timing, create T9
**							
**	INPUTS: Long with outcomes.dta - Panel A
**			Wide with outcomes.dta - Panel B
**	
**	OUTPUTS: Selection Effects Maina_cbl.xls - Panel A
**			 Selection Effects Maind_cbl.xls - Panel A
**			 Selection Effects Maina_extra.xls - Panel A
**			 Selection Effects Maind_extra.xls - Panel A
**			 Take up Timing.csv - Panel B
**				
**	CREATED/EDITED BY: Kayla Wilding, Leah Kim, Hasan Ahamed
**
**	DATE CREATED: 
**
**	DATE LAST EDITED: 2/28/2023
********************************************************************************
clear all

/*******************************************************************************
1. Panel A (Column )
********************************************************************************/

**Tests for Selection Effects into CBL Loans
*1. Filename and table title
local decs 3
local filename "Selection Effects Main"

*2. Define the covariate regression spec options and the outcome variables of interest
local outcomes scoredf ficoscore08
local testcontrols bfico_post fs081_400_post fs081_500_post fs081_600_post

local pana 1.post 1.op18#1.post
local pand 1.post 1.post#1.open_sdsame 1.post#1.open_sdsame_30 1.post#1.open_sd30d_after

local keepa 1.op18#1.post
local keepd 1.post#1.open_sdsame 1.post#1.open_sdsame_30 1.post#1.open_sd30d_after

local testd 1.post#1.open_sdsame = 1.post#1.open_sdsame_30 = 1.post#1.open_sd30d_after

*3. Import data and create variables related to timing of take up 
use "$adta/Long with outcomes.dta", clear 

*After 30 days
gen open_sd30d_after = ~ open_sd30d if op18 == 1
replace open_sd30d_after = 0 if missing(open_sd30d_after)
tab open_sd30d open_sd30d_after if flag_randomized ==1 & index == 1

*On same day as offer
gen open_sdsame_after = ~ open_sdsame if op18 == 1
replace open_sdsame_after  = 0 if missing(open_sdsame_after)
tab open_sdsame open_sdsame_after if flag_randomized ==1 & index == 1

*From Days 2-30
gen open_sdsame_30 = open_sd30d if op18 == 1
replace open_sdsame_30 = 0 if open_sdsame == 1
replace open_sdsame_30 = 0 if missing(open_sdsame_30)
tab open_sdsame_30 if flag_randomized == 1 & index == 1

foreach group in flag_cblgroup flag_extragroup {

	label var scoredf "Scored on FICO = 1"
	label var ficoscore08 "FICO Score"
	
	local g : subinstr local group "flag_" ""
	local g : subinstr local g "group" ""


	foreach l in a d {
		local t 1
		foreach var in `outcomes' {
			
			local controls 			// reset the control local
			local controlss 
				foreach control in `testcontrols' {
				*Standardize each control variable
				*String of locals were attached to each variable name because they should be in the dataset to compare coefficients
				*Without this, we can't use the suest command
					gen `control'_`g'`l'`t' = `control'
					sum `control'_`g'`l'`t' if `=cond("`var'" == "ficoscore08", "flag_matched_scored_base", "flag_matched")' == 1 & !mi(`var') & `group' == 1
					gen `control'stan_`g'`l'`t' = (`control'_`g'`l'`t' - `r(mean)')/`r(sd)' if `group' == 1
					assert mi(`control'_`g'`l'`t') if mi(`control'stan_`g'`l'`t')  & `group' == 1
					assert mi(`control'stan_`g'`l'`t') if mi(`control'_`g'`l'`t')  & `group' == 1
				*Generate a missing dummy for each control and replace the missing obs with 0 for both the standardized control var and regular control var 
					gen mi`control'_`g'`l'`t' = (mi(`control'_`g'`l'`t')) if `group' == 1
					replace `control'_`g'`l'`t' = 0 if mi(`control'_`g'`l'`t') & `group' == 1
					replace `control'stan_`g'`l'`t' = 0 if mi(`control'stan_`g'`l'`t')  & `group' == 1
				*Create a local of standardized controls and missing dummies for the pdslasso
				local controlss `controlss' `control'stan_`g'`l'`t' mi`control'_`g'`l'`t'
				}

				cap drop __*
				xtset surveyid 
				*run the pdslasso with the standardized control list 
								
				pdslasso `var' `pan`l'' (`controlss')  if `group' == 1 , fe robust 
				local controls `e(xselected)'
				*Remove the stan to use the controls in the regressions
				local controls = subinstr("`controls'", "stan","",.)
				*Include the missing dumy for each selected control 
				
			foreach selected in `controls' {

					if strpos("`selected'", "mi") == 1 {
						continue 
					}
					else {
						local miselected mi`selected'
						local included : list posof "`miselected'" in controls
						cap assert `included' == 0
							if !_rc {	//if the missing dummy is not in the select vars
								local controls `controls' mi`selected'
							}
							else {	//if the missing dummy is already in the selected vars, continue 
								local controls `controls'
							}
						}
					}
					
			noi di as r "controls for `group', outcome `var', specification `l' -- `controls'" 

				*Mark the sample 
				areg `var'  `pan`l'' `controls' if `=cond("`var'" == "ficoscore08", "flag_matched_scored_base", "flag_matched")' == 1  & `group' == 1, absorb(surveyid) vce(cluster surveyid)
				tempvar controlsample
				gen `controlsample' = e(sample)  if `group' == 1
				
				*No controls
				fvset base none surveyid // survey id base is different for cbl and extra group, for suest command to work we must match the base ID
				reg `var'  `pan`l'' i.surveyid if `controlsample' == 1  & `=cond("`var'" == "ficoscore08", "flag_matched_scored_base", "flag_matched")' == 1  & `group' == 1				
				est store `g'_`l'_`var'_ctrln
				*noi display "`g'_`l'_`var'_ctrln"
				areg `var'  `pan`l'' if `controlsample' == 1  & `=cond("`var'" == "ficoscore08", "flag_matched_scored_base", "flag_matched")' == 1  & `group' == 1, absorb(surveyid) vce(cluster surveyid)
				if "`l'" ~= "a" {
					test `test`l''
					loc pval : di %6.2f `r(p)'
				}

				local obs = e(N)
				local inds = e(df_a) + 1
				sum `var' if e(sample) == 1 & index == 1 
				local mu_lhs = string(r(mean), "%14.`=cond("`var'" == "ficoscore08", "0", "`decs'")'f") 

				unique surveyid if op18 == 1 & `controlsample' == 1 
				local numopen = r(unique)
				outreg2 using "$outputtables/`filename'`l'_`g'.xls", keep(`keep`l'') excel label nocons noobs dec(`decs') nor2 slow(8000) nonote noaster addtext(Controls Included?, "No", Number of open, `numopen', Obs, `obs', Individuals, `inds', Mean Dependent Variable in CBL Group at Baseline, `mu_lhs' `=cond("`l'"~="a", ",P-value, `pval'","")') `=cond(`t' == 1,"replace","append")'
				
				**With controls
				fvset base none surveyid
				reg `var'  `pan`l'' `controls' i.surveyid if `=cond("`var'" == "ficoscore08", "flag_matched_scored_base", "flag_matched")' == 1 &  `group' == 1
				est store `g'_`l'_`var'_ctrly
				*noi display "`g'_`l'_`var'_ctrly"
				areg `var'  `pan`l'' `controls' if `=cond("`var'" == "ficoscore08", "flag_matched_scored_base", "flag_matched")' == 1  & `group' == 1, absorb(surveyid) vce(cluster surveyid)
				if "`l'" ~= "a" {
					test `test`l''
					loc pval : di %6.2f `r(p)'
				}
				
				local obs = e(N)
				local inds = e(df_a) + 1
				sum `var' if e(sample) == 1 & index == 1
				local mu_lhs = string(r(mean), "%14.`=cond("`var'" == "ficoscore08", "0", "`decs'")'f") 
				unique surveyid if op18 == 1 & `controlsample' == 1 
				local numopen = r(unique)
			
				outreg2 using "$outputtables/`filename'`l'_`g'.xls",  append keep(`keep`l'') sortvar(1.op18#1.post) excel label nocons noobs dec(`decs') nor2 slow(8000) nonote noaster addtext(Controls Included?, "Yes", Number of open, `numopen', Obs, `obs', Individuals, `inds', Mean Dependent Variable in CBL Group at Baseline, `mu_lhs' `=cond("`l'"~="a", ",P-value, `pval'","")')
				
			local ++t
		}
	}
}


foreach l in a d { 
	
 if "`l'" == "a" {
 	loc n 1
 	mat pval`l' = J(1,4,.)
 }
 else {
 	loc n 3
 	mat pval`l' = J(3,4,.)	
 }
		
		loc j 1
		
qui foreach o of local outcomes {
		
		*We use suest command to compare coefficients across groups (CBL vs Extra-Step)
		suest cbl_`l'_`o'_ctrln extra_`l'_`o'_ctrln, vce(cluster surveyid) 
		forval i = 1/`n' {
		local coef_s : word `i' of `keep`l''
		test [cbl_`l'_`o'_ctrln_mean]`coef_s' = [extra_`l'_`o'_ctrln_mean]`coef_s'
		mat pval`l'[`i',`j'] = r(p)
		}
		
		*noi display `j'
		*noi display "outcome `o', specification `l', control no"

		
		suest cbl_`l'_`o'_ctrly extra_`l'_`o'_ctrly, vce(cluster surveyid)
		forval i = 1/`n' {
		local coef_s : word `i' of `keep`l''
		test [cbl_`l'_`o'_ctrly_mean]`coef_s' = [extra_`l'_`o'_ctrly_mean]`coef_s'
		mat pval`l'[`i',`j'+1] = r(p)
		}
		
		*noi display `j'+1
		*noi display "outcome `o', specification `l', control yes"
		loc j `j'+2
		
		*noi display `j'
	}
	
}

*table 9 column 5 and 10
putexcel set "$outputtables/Selection Effects Main_pval.xls", replace
putexcel A1 = "p-value-not reported"
putexcel B1 = "p-value-c5"
putexcel C1 = "p-value-not reported"
putexcel D1 = "p-value-c-10"
putexcel A2 = matrix(pvala), nformat(number_d2)
putexcel A3 = matrix(pvald), nformat(number_d2)

/*******************************************************************************
2. Panel B
********************************************************************************/

est clear
use "$adta/Wide with outcomes.dta", clear

*section B of table 9
gen open_sdsame_30d = open_sd30d
replace open_sdsame_30d = 0 if open_sdsame 

gen open_sd30d_after = ~open_sd30d if op18 == 1
replace open_sd30d_after = 0 if missing(open_sd30d_after)

*calculating number of take-up on different periods
foreach var in op18 open_sdsame open_sdsame_30d open_sd30d_after {
count  if flag_randomized == 1 & flag_cblgroup == 1
loc tot_cbl = r(N)

count  if flag_randomized == 1 & flag_cblgroup == 1 & `var' == 1
loc tot_cbl_`var' = r(N)

loc per_cbl_`var' = string((`tot_cbl_`var''/`tot_cbl')*100, "%14.0f") + "%"

count  if flag_randomized == 1 & flag_cblgroup == 0
loc tot_nocbl = r(N)

count  if flag_randomized == 1 & flag_cblgroup == 0 & `var' == 1
loc tot_nocbl_`var' = r(N)

loc per_nocbl_`var' = string((`tot_nocbl_`var''/`tot_nocbl')*100, "%14.0f") + "%"

count if `var'==1 & flag_randomized == 1
loc n_`var' = r(N)
}

putexcel set "$outputtables/Take up Timing.xls", replace
putexcel A1 = "Treatment Arms"
putexcel B1 = "All took up"
putexcel C1 = "Took up on same day"
putexcel D1 = "Took up between 2 to 30"
putexcel E1 = "Took up after 30 days"

putexcel A2 = "Take up in CBL Arm"
putexcel A3 = "Take up in Extra Step Arm"
putexcel A4 = "N"


putexcel B2 = "`per_cbl_op18'"
putexcel C2 = "`per_cbl_open_sdsame'"
putexcel D2 = "`per_cbl_open_sdsame_30d'"
putexcel E2 = "`per_cbl_open_sd30d_after'"

putexcel B3 = "`per_nocbl_op18'"
putexcel C3 = "`per_nocbl_open_sdsame'"
putexcel D3 = "`per_nocbl_open_sdsame_30d'"
putexcel E3 = "`per_nocbl_open_sd30d_after'"

putexcel B4 = "`n_op18'"
putexcel C4 = "`n_open_sdsame'"
putexcel D4 = "`n_open_sdsame_30d'"
putexcel E4 = "`n_open_sd30d_after'"


**EOF***
